scLANE Simulation Study - Simulated Data Quality ControlFirst we’ll load in the packages we need to tidy & analyze our simulation results.
library(dplyr) # data manipulation
library(scran) # scRNA tools
library(scater) # more scRNA tools
library(Seurat) # scRNA methods & data structures
library(ggplot2) # plots
library(targets) # pipeline tools
library(paletteer) # plot colors
library(patchwork) # plot combination
library(SingleCellExperiment) # scRNA data structures First we’ll load in the lung reference dataset from the {scRNAseq} package & process it like we would our simulated datasets. Please ignore the calls to gc() littered throughout the code, it was so difficult to get all these datasets into memory & processed without my R session crashing over & over again.
lung_data <- scRNAseq::ZilionisLungData(which = "human", filter = TRUE)
lung_data_clean <- lung_data[rowSums(counts(lung_data) > 0) >= 3, ] # genes in at least 3 cells
colnames(lung_data_clean) <- make.unique(colnames(lung_data_clean))
gc(full = TRUE)
# process
lung_data_clean <- logNormCounts(lung_data_clean)
var_decomp <- modelGeneVar(lung_data_clean)
top2k_hvgs <- getTopHVGs(var_decomp, n = 2000)
gc(full = TRUE)
lung_data_clean <- runPCA(lung_data_clean, subset_row = top2k_hvgs)
reducedDim(lung_data_clean, "PCAsub") <- reducedDim(lung_data_clean, "PCA")[, 1:30, drop = FALSE]
lung_data_clean <- runUMAP(lung_data_clean,
dimred = "PCAsub",
n_dimred = 1:30)
gc(full = TRUE)
g <- buildSNNGraph(lung_data_clean,
use.dimred = "PCAsub",
k = 30)
clusters <- igraph::cluster_louvain(graph = g)$membership
colLabels(lung_data_clean) <- factor(clusters)
gc(full = TRUE)
lung_data_clean <- as.Seurat(lung_data_clean,
counts = "counts",
data = "logcounts")
gc(full = TRUE)Next we create a table of summary statistics & dataset characteristics. We then create a {ggplot2}-friendly version of the table using {gridExtra} & {gtable}. This allows us to include the table as part of a plot object.
n_cells <- ncol(lung_data_clean)
n_genes <- nrow(lung_data_clean)
mean_count <- mean(lung_data_clean@assays$originalexp@counts)
med_count <- 0
sd_count <- sd(lung_data_clean@assays$originalexp@counts)
var_count <- sd_count^2 # faster
range_count <- range(lung_data_clean@assays$originalexp@counts)
sparsity_count <- mean(lung_data_clean@assays$originalexp@counts == 0)
summary_df <- data.frame(metric = c("Mean", "Median", "S.D.", "Variance", "Range", "Sparsity"),
value = c(round(mean_count, 2),
med_count,
round(sd_count, 2),
round(var_count, 2),
paste0("(", range_count[1], ", ", range_count[2], ")"),
paste0(round(sparsity_count, 4) * 100, "%")))
plot_table <- gridExtra::tableGrob(summary_df,
rows = NULL,
cols = c("Metric", "Value"),
theme = gridExtra::ttheme_minimal(core = list(fg_params = list(hjust = 0, x = 0.05)),
colhead = list(fg_params = list(hjust = 0, x = 0.05)))) %>%
gtable::gtable_add_grob(grobs = grid::rectGrob(gp = grid::gpar(fill = NA, lwd = 2)),
t = 2,
b = nrow(.),
l = 1,
r = ncol(.)) %>%
gtable::gtable_add_grob(grobs = grid::rectGrob(gp = grid::gpar(fill = NA, lwd = 2)),
t = 1,
l = 1,
r = ncol(.))
gc(full = TRUE)The first plot we want is a histogram of raw counts. Unfortunately, this is apparently very hard to do since our data is so large. We first convert our counts matrix to a file-backed matrix, and manually compute the bin ranges we’re interested in (with \(b = 20\) bins). Next, we iterate over the bins & sum the number of counts within each range. However, this too cause issues, so we also iterate over the columns of the matrix (cells), and sum within each cell first, then aggregate afterwards for the final per-bin value.
bin_df <- data.frame(lower_bound = seq(range_count[1], range_count[2], length.out = 20)) %>%
mutate(upper_bound = lead(lower_bound),
across(contains("bound"), \(x) round(x))) %>%
filter(!is.na(upper_bound)) %>%
mutate(n = NA_real_)
counts_mat <- bigstatsr::as_FBM(as.matrix(lung_data_clean@assays$originalexp@counts),
type = "integer",
is_read_only = TRUE)
gc(full = TRUE)
for (b in seq(nrow(bin_df))) {
bin_sums_tmp <- vector("numeric", length = ncol(counts_mat))
for (j in seq(ncol(counts_mat))) {
col_j <- counts_mat[, j]
if (b == 1) {
bin_sums_tmp[j] <- sum(col_j >= bin_df$lower_bound[b] & col_j <= bin_df$upper_bound[b])
} else {
bin_sums_tmp[j] <- sum(col_j > bin_df$lower_bound[b] & col_j <= bin_df$upper_bound[b])
}
rm(col_j)
}
bin_sum <- sum(bin_sums_tmp)
bin_df$n[b] <- bin_sum
gc(full = TRUE)
}
rm(counts_mat); gc(full = TRUE)
p0 <- ggplot(bin_df, aes(x = lower_bound, y = n)) +
geom_bar(stat = "identity", fill = "dodgerblue") +
scale_y_continuous(labels = scales::label_scientific()) +
scale_x_continuous(labels = scales::label_comma()) +
labs(x = "Raw Expression", y = "Frequency") +
theme_classic(base_size = 14)We’ll do the same for the normalized counts.
bin_df <- data.frame(lower_bound = seq(0, max(lung_data_clean@assays$originalexp@data), length.out = 20)) %>%
mutate(upper_bound = lead(lower_bound)) %>%
filter(!is.na(upper_bound)) %>%
mutate(n = NA_real_)
counts_mat <- bigstatsr::as_FBM(as.matrix(lung_data_clean@assays$originalexp@data),
type = "float",
is_read_only = TRUE)
gc(full = TRUE)
for (b in seq(nrow(bin_df))) {
bin_sums_tmp <- vector("numeric", length = ncol(counts_mat))
for (j in seq(ncol(counts_mat))) {
col_j <- counts_mat[, j]
if (b == 1) {
bin_sums_tmp[j] <- sum(col_j >= bin_df$lower_bound[b] & col_j <= bin_df$upper_bound[b])
} else {
bin_sums_tmp[j] <- sum(col_j > bin_df$lower_bound[b] & col_j <= bin_df$upper_bound[b])
}
rm(col_j)
}
bin_sum <- sum(bin_sums_tmp)
bin_df$n[b] <- bin_sum
gc(full = TRUE)
}
rm(counts_mat); gc(full = TRUE)
p1 <- ggplot(bin_df, aes(x = lower_bound, y = n)) +
geom_bar(stat = "identity", fill = "forestgreen") +
scale_y_continuous(labels = scales::label_scientific()) +
labs(x = "Normalized Expression", y = "Frequency") +
theme_classic(base_size = 14)Now that the tricky stuff is out of the way, we make a UMAP & PCA plot of the unsupervised clustering.
p2 <- data.frame(UMAP1 = lung_data_clean@reductions$UMAP@cell.embeddings[, 1],
UMAP2 = lung_data_clean@reductions$UMAP@cell.embeddings[, 2],
cluster = lung_data_clean$label) %>%
ggplot(aes(x = UMAP1, y = UMAP2, color = cluster)) +
geom_point() +
scale_color_manual(values = paletteer_d("ggsci::category20_d3")) +
labs(x = "UMAP 1", y = "UMAP 2", color = "Louvain Cluster") +
theme_classic(base_size = 14) +
theme(axis.text = element_blank(),
axis.ticks = element_blank()) +
guides(color = guide_legend(override.aes = list(size = 2)))
p3 <- data.frame(PC1 = lung_data_clean@reductions$PCA@cell.embeddings[, 1],
PC2 = lung_data_clean@reductions$PCA@cell.embeddings[, 2],
cluster = lung_data_clean$label) %>%
ggplot(aes(x = PC1, y = PC2, color = cluster)) +
geom_point() +
scale_color_manual(values = paletteer_d("ggsci::category20_d3")) +
labs(x = "PC 1", y = "PC 2", color = "Louvain Cluster") +
theme_classic(base_size = 14) +
theme(legend.position = "none",
axis.text = element_blank(),
axis.ticks = element_blank())We align everything using the {patchwork} package, save the figure, and plot it all.
p4a <- (p0 | p1) / (p2 | p3) +
plot_layout(guides = "collect")
p4b <- (p4a | plot_table) +
plot_layout(ncol = 2, widths = c(3, 1)) +
plot_annotation(title = paste0("Metrics for Lung Reference Dataset"))
ggsave(filename = "QC_lung_reference.pdf",
plot = p4b,
device = "pdf",
path = "/blue/rbacher/j.leary/repos/scLANE_Analysis/Figures/QC_Plots/",
width = 13,
height = 8,
units = "in",
dpi = "retina")
p4bWe load the single-subject simulated datasets into a list & turn them into {Seurat} objects for plotting.
# 100 cells
tar_load(lung_sim_DEG_01_CELLS_100)
tar_load(lung_sim_DEG_05_CELLS_100)
tar_load(lung_sim_DEG_10_CELLS_100)
tar_load(lung_sim_DEG_20_CELLS_100)
# 500 cells
tar_load(lung_sim_DEG_01_CELLS_500)
tar_load(lung_sim_DEG_05_CELLS_500)
tar_load(lung_sim_DEG_10_CELLS_500)
tar_load(lung_sim_DEG_20_CELLS_500)
# 1,000 cells
tar_load(lung_sim_DEG_01_CELLS_1000)
tar_load(lung_sim_DEG_05_CELLS_1000)
tar_load(lung_sim_DEG_10_CELLS_1000)
tar_load(lung_sim_DEG_20_CELLS_1000)
# 2,500 cells
tar_load(lung_sim_DEG_01_CELLS_2500)
tar_load(lung_sim_DEG_05_CELLS_2500)
tar_load(lung_sim_DEG_10_CELLS_2500)
tar_load(lung_sim_DEG_20_CELLS_2500)
# 5,000 cells
tar_load(lung_sim_DEG_01_CELLS_5000)
tar_load(lung_sim_DEG_05_CELLS_5000)
tar_load(lung_sim_DEG_10_CELLS_5000)
tar_load(lung_sim_DEG_20_CELLS_5000)
# coerce to list & process
obj_list <- purrr::map(ls(pattern = "lung_sim")[!grepl("balanced", ls(pattern = "lung_sim"))], function(sim) {
obj <- eval(as.symbol(sim))
obj <- as.Seurat(obj, counts = "counts", data = "logcounts")
obj@meta.data <- mutate(obj@meta.data,
perc_deg = paste0(as.numeric(stringr::str_remove(stringr::str_extract(sim, "lung_sim_DEG_.."), "lung_sim_DEG_")), "%"),
n_cells = as.character(ncol(obj)),
n_genes = as.character(nrow(obj)),
sce_name = sim)
return(obj)
})
rm(list = ls(pattern = "lung_sim")); gc(full = TRUE)Iterating over the datasets, we print each QC plot & save them to PDFs.
purrr::walk(obj_list, function(z) {
# gather metadata
obj_name <- z@meta.data$sce_name[1]
n_cells <- z@meta.data$n_cells[1]
n_genes <- z@meta.data$n_genes[1]
perc_deg <- z@meta.data$perc_deg[1]
# summary stat table
sparsity_count <- mean(z@assays$originalexp@counts == 0)
mean_count <- mean(z@assays$originalexp@counts)
med_count <- ifelse(sparsity_count > 0.5, 0, median(z@assays$originalexp@counts))
sd_count <- sd(z@assays$originalexp@counts)
var_count <- sd_count^2
range_count <- range(z@assays$originalexp@counts)
summary_df <- data.frame(metric = c("Mean", "Median", "S.D.", "Variance", "Range", "Sparsity"),
value = c(round(mean_count, 2),
round(med_count, 2),
round(sd_count, 2),
round(var_count, 2),
paste0("(", range_count[1], ", ", range_count[2], ")"),
paste0(round(sparsity_count, 4) * 100, "%")))
# create counts histogram
p0 <- data.frame(x = as.numeric(z@assays$originalexp@counts)) %>%
ggplot(aes(x = x)) +
geom_histogram(fill = "dodgerblue") +
scale_y_continuous(labels = scales::label_scientific()) +
scale_x_continuous(labels = scales::label_comma()) +
labs(x = "Raw Expression", y = "Frequency") +
theme_classic(base_size = 14)
# create log counts histogram
p1 <- data.frame(x = as.numeric(z@assays$originalexp@data)) %>%
ggplot(aes(x = x)) +
geom_histogram(fill = "forestgreen") +
scale_y_continuous(labels = scales::label_scientific()) +
labs(x = "Normalized Expression", y = "Frequency") +
theme_classic(base_size = 14)
# create UMAP by cluster
p2 <- data.frame(UMAP1 = z@reductions$UMAP@cell.embeddings[, 1],
UMAP2 = z@reductions$UMAP@cell.embeddings[, 2],
cluster = z$label) %>%
ggplot(aes(x = UMAP1, y = UMAP2, color = cluster)) +
geom_point() +
scale_color_manual(values = paletteer_d("ggsci::category20_d3")) +
labs(x = "UMAP 1", y = "UMAP 2", color = "Louvain Cluster") +
theme_classic(base_size = 14) +
theme(axis.text = element_blank(),
axis.ticks = element_blank()) +
guides(color = guide_legend(override.aes = list(size = 2)))
# create PCA of cell ordering
p3 <- data.frame(PC1 = z@reductions$PCA@cell.embeddings[, 1],
PC2 = z@reductions$PCA@cell.embeddings[, 2],
cell_time = z$cell_time_normed) %>%
ggplot(aes(x = PC1, y = PC2, color = cell_time)) +
geom_point() +
scale_color_gradientn(colors = paletteer_d("wesanderson::Zissou1")) +
labs(x = "PC 1", y = "PC 2", color = "True Ordering") +
theme_classic(base_size = 14) +
theme(axis.text = element_blank(),
axis.ticks = element_blank())
# table of simulation parameters
param_df <- data.frame(metric = c("Number of Cells", "Number of Genes", "% Dynamic Genes"),
value = c(as.character(n_cells), as.character(n_genes), perc_deg))
plot_table <- rbind(summary_df, param_df) %>%
gridExtra::tableGrob(rows = NULL,
cols = c("Metric", "Value"),
theme = gridExtra::ttheme_minimal(core = list(fg_params = list(hjust = 0, x = 0.05)),
colhead = list(fg_params = list(hjust = 0, x = 0.05)))) %>%
gtable::gtable_add_grob(grobs = grid::rectGrob(gp = grid::gpar(fill = NA, lwd = 2)),
t = 2,
b = nrow(.),
l = 1,
r = ncol(.)) %>%
gtable::gtable_add_grob(grobs = grid::rectGrob(gp = grid::gpar(fill = NA, lwd = 2)),
t = 1,
l = 1,
r = ncol(.))
# align everything
p4a <- (p0 | p1) / (p2 | p3) +
plot_layout(guides = "collect")
p4b <- (p4a | plot_table) +
plot_layout(ncol = 2, widths = c(3, 1)) +
plot_annotation(title = paste0("Metrics for dataset: ", obj_name))
# save & print plot
ggsave(filename = paste0("QC_", obj_name, ".pdf"),
plot = p4b,
device = "pdf",
path = "/blue/rbacher/j.leary/repos/scLANE_Analysis/Figures/QC_Plots/",
width = 13,
height = 8,
units = "in",
dpi = "retina")
print(p4b)
# cleanup
sink(tempfile())
rm(p0, p1, p2, p3, p4a, p4b, plot_table); gc(full = TRUE)
sink()
})rm(obj_list)We repeat the process for the multi-subject simulated datasets.
# 100 cells
tar_load(lung_sim_DEG_10_CELLS_100_balanced)
tar_load(lung_sim_DEG_20_CELLS_100_balanced)
tar_load(lung_sim_DEG_10_CELLS_100_unbalanced)
tar_load(lung_sim_DEG_20_CELLS_100_unbalanced)
# 500 cells
tar_load(lung_sim_DEG_10_CELLS_500_balanced)
tar_load(lung_sim_DEG_20_CELLS_500_balanced)
tar_load(lung_sim_DEG_10_CELLS_500_unbalanced)
tar_load(lung_sim_DEG_20_CELLS_500_unbalanced)
# 1,000 cells
tar_load(lung_sim_DEG_10_CELLS_1000_balanced)
tar_load(lung_sim_DEG_20_CELLS_1000_balanced)
tar_load(lung_sim_DEG_10_CELLS_1000_unbalanced)
tar_load(lung_sim_DEG_20_CELLS_1000_unbalanced)
# 2,500 cells
tar_load(lung_sim_DEG_10_CELLS_2500_balanced)
tar_load(lung_sim_DEG_20_CELLS_2500_balanced)
tar_load(lung_sim_DEG_10_CELLS_2500_unbalanced)
tar_load(lung_sim_DEG_20_CELLS_2500_unbalanced)
# 5,000 cells
tar_load(lung_sim_DEG_10_CELLS_5000_balanced)
tar_load(lung_sim_DEG_20_CELLS_5000_balanced)
tar_load(lung_sim_DEG_10_CELLS_5000_unbalanced)
tar_load(lung_sim_DEG_20_CELLS_5000_unbalanced)
# coerce to list & process
obj_list <- purrr::map(ls(pattern = "lung_sim*balanced"), function(sim) {
obj <- eval(as.symbol(sim))
obj <- as.Seurat(obj, counts = "counts", data = "logcounts")
obj@meta.data <- mutate(obj@meta.data,
perc_deg = paste0(as.numeric(stringr::str_remove(stringr::str_extract(sim, "lung_sim_DEG_.."), "lung_sim_DEG_")), "%"),
n_cells = as.character(ncol(obj)),
n_genes = as.character(nrow(obj)),
allocation = ifelse(grepl("_balanced", sim), "Balanced", "Unbalanced"),
sce_name = sim)
return(obj)
})
rm(list = ls(pattern = "lung_sim")); gc(full = TRUE)We add a UMAP of the subject identities, & a PCA plot of the true pseudotime split by subject when generating the QC plots for multi-subject data.
purrr::walk(obj_list, function(z) {
# gather metadata
obj_name <- z@meta.data$sce_name[1]
n_cells <- z@meta.data$n_cells[1]
n_genes <- z@meta.data$n_genes[1]
perc_deg <- z@meta.data$perc_deg[1]
allocation <- z@meta.data$allocation[1]
n_subjects <- length(unique(z@meta.data$subject))
# summary stat table
sparsity_count <- mean(z@assays$originalexp@counts == 0)
mean_count <- mean(z@assays$originalexp@counts)
med_count <- ifelse(sparsity_count > 0.5, 0, median(z@assays$originalexp@counts))
sd_count <- sd(z@assays$originalexp@counts)
var_count <- sd_count^2
range_count <- range(z@assays$originalexp@counts)
summary_df <- data.frame(metric = c("Mean", "Median", "S.D.", "Variance", "Range", "Sparsity"),
value = c(round(mean_count, 2),
round(med_count, 2),
round(sd_count, 2),
round(var_count, 2),
paste0("(", range_count[1], ", ", range_count[2], ")"),
paste0(round(sparsity_count, 4) * 100, "%")))
# create counts histogram
p0 <- data.frame(x = as.numeric(z@assays$originalexp@counts)) %>%
ggplot(aes(x = x)) +
geom_histogram(fill = "dodgerblue") +
scale_y_continuous(labels = scales::label_scientific()) +
scale_x_continuous(labels = scales::label_comma()) +
labs(x = "Raw Expression", y = "Frequency") +
theme_classic(base_size = 14)
# create log counts histogram
p1 <- data.frame(x = as.numeric(z@assays$originalexp@data)) %>%
ggplot(aes(x = x)) +
geom_histogram(fill = "forestgreen") +
scale_y_continuous(labels = scales::label_scientific()) +
labs(x = "Normalized Expression", y = "Frequency") +
theme_classic(base_size = 14)
# create UMAP by cluster & subject
legend_clust <- ggpubr::get_legend(p = (data.frame(UMAP1 = z@reductions$UMAP@cell.embeddings[, 1],
UMAP2 = z@reductions$UMAP@cell.embeddings[, 2],
cluster = z$label) %>%
ggplot(aes(x = UMAP1, y = UMAP2, color = cluster)) +
geom_point() +
scale_color_manual(values = paletteer_d("ggsci::category20_d3")[1:length(unique(z$label))]) +
labs(color = "Louvain\nCluster") +
theme_classic(base_size = 14) + theme(legend.text.align = 0.5)))
legend_subj <- ggpubr::get_legend(p = (data.frame(UMAP1 = z@reductions$UMAP@cell.embeddings[, 1],
UMAP2 = z@reductions$UMAP@cell.embeddings[, 2],
subject = z$subject) %>%
ggplot(aes(x = UMAP1, y = UMAP2, color = subject)) +
geom_point() +
scale_color_manual(values = paletteer_d("ggsci::category20_d3")[(length(unique(z$label)) + 1):20]) +
labs(color = "Subject") +
theme_classic(base_size = 14)))
p2a <- data.frame(UMAP1 = z@reductions$UMAP@cell.embeddings[, 1],
UMAP2 = z@reductions$UMAP@cell.embeddings[, 2],
cluster = z$label,
subject = z$subject) %>%
tidyr::pivot_longer(cols = c(cluster, subject), names_to = "ident", values_to = "ident_value") %>%
mutate(ident = case_when(ident == "cluster" ~ "Louvain Cluster",
TRUE ~ "Subject")) %>%
ggplot(aes(x = UMAP1, y = UMAP2, color = ident_value, group = ident)) +
facet_wrap(~ident) +
geom_point() +
scale_color_manual(values = paletteer_d("ggsci::category20_d3")) +
labs(x = "UMAP 1", y = "UMAP 2") +
theme_classic(base_size = 14) +
theme(legend.position = "none",
axis.text = element_blank(),
axis.ticks = element_blank())
p2b <- (p2a + (wrap_elements(legend_clust) / wrap_elements(legend_subj)) +
plot_layout(ncol = 2, widths = c(4, 1)))
# create PCA of cell ordering
p3a <- data.frame(PC1 = z@reductions$PCA@cell.embeddings[, 1],
PC2 = z@reductions$PCA@cell.embeddings[, 2],
cell_time = z$cell_time_normed,
subject = z$subject) %>%
ggplot(aes(x = PC1, y = PC2, color = cell_time)) +
facet_wrap(~subject) +
geom_point() +
scale_color_gradientn(colors = paletteer_d("wesanderson::Zissou1")) +
labs(x = "PC 1", y = "PC 2", color = "True Ordering") +
theme_classic(base_size = 14) +
theme(axis.ticks = element_blank(),
axis.text = element_blank())
p3b <- data.frame(PC1 = z@reductions$PCA@cell.embeddings[, 1],
PC2 = z@reductions$PCA@cell.embeddings[, 2],
cell_time = z$cell_time_normed) %>%
ggplot(aes(x = PC1, y = PC2, color = cell_time)) +
geom_point() +
scale_color_gradientn(colors = paletteer_d("wesanderson::Zissou1")) +
labs(x = "PC 1", y = "PC 2", color = "True Ordering") +
theme_classic(base_size = 14) +
theme(axis.ticks = element_blank(),
axis.text = element_blank())
p3c <- (p3a | p3b) +
plot_layout(guides = "collect", widths = c(3, 2), ncol = 2)
# table of simulation parameters
param_df <- data.frame(metric = c("N Cells", "N Genes", "% Dynamic", "Allocation", "N Subjects"),
value = c(as.character(n_cells),
as.character(n_genes),
perc_deg,
allocation,
n_subjects))
plot_table <- rbind(summary_df, param_df) %>%
gridExtra::tableGrob(rows = NULL,
cols = c("Metric", "Value"),
theme = gridExtra::ttheme_minimal(core = list(fg_params = list(hjust = 0, x = 0.05)),
colhead = list(fg_params = list(hjust = 0, x = 0.05)))) %>%
gtable::gtable_add_grob(grobs = grid::rectGrob(gp = grid::gpar(fill = NA, lwd = 2)),
t = 2,
b = nrow(.),
l = 1,
r = ncol(.)) %>%
gtable::gtable_add_grob(grobs = grid::rectGrob(gp = grid::gpar(fill = NA, lwd = 2)),
t = 1,
l = 1,
r = ncol(.))
# align everything
p4a <- ((p0 | p1 | plot_table) + plot_layout(widths = c(1, 1, 0.5))) / (p2b | p3c) +
plot_layout(nrow = 2) +
plot_annotation(title = paste0("Metrics for dataset: ", obj_name))
# save & print plot
ggsave(filename = paste0("QC_", obj_name, ".pdf"),
plot = p4a,
device = "pdf",
path = "/blue/rbacher/j.leary/repos/scLANE_Analysis/Figures/QC_Plots/",
width = 13,
height = 9,
units = "in",
dpi = "retina")
print(p4a)
# cleanup
sink(tempfile())
rm(p0, p1, p2, p3, p4a, plot_table); gc(full = TRUE)
sink()
})
rm(obj_list)Next let’s examine the pancreas reference dataset. Since it’s smaller than the lung reference, we don’t need to resort to trickery to generate our histograms.
panc_data <- scRNAseq::BaronPancreasData(which = "human")
panc_data_clean <- panc_data[rowSums(counts(panc_data) > 0) >= 3, ] # genes in at least 3 cells
panc_data_clean <- logNormCounts(panc_data_clean)
var_decomp <- modelGeneVar(panc_data_clean)
top2k_hvgs <- getTopHVGs(var_decomp, n = 2000)
panc_data_clean <- runPCA(panc_data_clean, subset_row = top2k_hvgs)
reducedDim(panc_data_clean, "PCAsub") <- reducedDim(panc_data_clean, "PCA")[, 1:30, drop = FALSE]
panc_data_clean <- runUMAP(panc_data_clean,
dimred = "PCAsub",
n_dimred = 1:30)
g <- buildSNNGraph(panc_data_clean,
use.dimred = "PCAsub",
k = 30)
clusters <- igraph::cluster_louvain(graph = g)$membership
colLabels(panc_data_clean) <- factor(clusters)
panc_data_clean <- as.Seurat(panc_data_clean,
counts = "counts",
data = "logcounts")We re-create the summary statistics table for the pancreas reference.
n_cells <- ncol(panc_data_clean)
n_genes <- nrow(panc_data_clean)
mean_count <- mean(panc_data_clean@assays$originalexp@counts)
med_count <- 0
sd_count <- sd(panc_data_clean@assays$originalexp@counts)
var_count <- sd_count^2 # faster
range_count <- range(panc_data_clean@assays$originalexp@counts)
sparsity_count <- mean(panc_data_clean@assays$originalexp@counts == 0)
summary_df <- data.frame(metric = c("Mean", "Median", "S.D.", "Variance", "Range", "Sparsity"),
value = c(round(mean_count, 2),
med_count,
round(sd_count, 2),
round(var_count, 2),
paste0("(", range_count[1], ", ", range_count[2], ")"),
paste0(round(sparsity_count, 4) * 100, "%")))
plot_table <- gridExtra::tableGrob(summary_df, rows = NULL,
cols = c("Metric", "Value"),
theme = gridExtra::ttheme_minimal(core = list(fg_params = list(hjust = 0, x = 0.05)),
colhead = list(fg_params = list(hjust = 0, x = 0.05)))) %>%
gtable::gtable_add_grob(grobs = grid::rectGrob(gp = grid::gpar(fill = NA, lwd = 2)),
t = 2,
b = nrow(.),
l = 1,
r = ncol(.)) %>%
gtable::gtable_add_grob(grobs = grid::rectGrob(gp = grid::gpar(fill = NA, lwd = 2)),
t = 1,
l = 1,
r = ncol(.))
gc(full = TRUE)We create the two histograms in the usual way.
p0 <- data.frame(x = as.numeric(panc_data_clean@assays$originalexp@counts)) %>%
ggplot(aes(x = x)) +
geom_histogram(fill = "dodgerblue") +
scale_y_continuous(labels = scales::label_scientific()) +
scale_x_continuous(labels = scales::label_comma()) +
labs(x = "Raw Expression", y = "Frequency") +
theme_classic(base_size = 14)
p1 <- data.frame(x = as.numeric(panc_data_clean@assays$originalexp@data)) %>%
ggplot(aes(x = x)) +
geom_histogram(fill = "forestgreen") +
scale_y_continuous(labels = scales::label_scientific()) +
labs(x = "Normalized Expression", y = "Frequency") +
theme_classic(base_size = 14)The UMAP & PCA plots are generated as before.
p2 <- data.frame(UMAP1 = panc_data_clean@reductions$UMAP@cell.embeddings[, 1],
UMAP2 = panc_data_clean@reductions$UMAP@cell.embeddings[, 2],
cluster = panc_data_clean$label) %>%
ggplot(aes(x = UMAP1, y = UMAP2, color = cluster)) +
geom_point() +
scale_color_manual(values = paletteer_d("ggsci::category20_d3")) +
labs(x = "UMAP 1", y = "UMAP 2", color = "Louvain Cluster") +
theme_classic(base_size = 14) +
theme(axis.text = element_blank(),
axis.ticks = element_blank()) +
guides(color = guide_legend(override.aes = list(size = 2)))
p3 <- data.frame(PC1 = panc_data_clean@reductions$PCA@cell.embeddings[, 1],
PC2 = panc_data_clean@reductions$PCA@cell.embeddings[, 2],
cluster = panc_data_clean$label) %>%
ggplot(aes(x = PC1, y = PC2, color = cluster)) +
geom_point() +
scale_color_manual(values = paletteer_d("ggsci::category20_d3")) +
labs(x = "PC 1", y = "PC 2", color = "Louvain Cluster") +
theme_classic(base_size = 14) +
theme(legend.position = "none",
axis.text = element_blank(),
axis.ticks = element_blank())Lastly, we align everything & plot it.
p4a <- (p0 | p1) / (p2 | p3) +
plot_layout(guides = "collect")
p4b <- (p4a | plot_table) +
plot_layout(ncol = 2, widths = c(3, 1)) +
plot_annotation(title = paste0("Metrics for Pancreas Reference Dataset"))
ggsave(filename = "QC_panc_reference.pdf",
plot = p4b,
device = "pdf",
path = "/blue/rbacher/j.leary/repos/scLANE_Analysis/Figures/QC_Plots/",
width = 13,
height = 8,
units = "in",
dpi = "retina")
p4bHere we bring the single subject simulations from the pancreas reference into memory.
# 100 cells
tar_load(panc_sim_DEG_01_CELLS_100)
tar_load(panc_sim_DEG_05_CELLS_100)
tar_load(panc_sim_DEG_10_CELLS_100)
tar_load(panc_sim_DEG_20_CELLS_100)
# 500 cells
tar_load(panc_sim_DEG_01_CELLS_500)
tar_load(panc_sim_DEG_05_CELLS_500)
tar_load(panc_sim_DEG_10_CELLS_500)
tar_load(panc_sim_DEG_20_CELLS_500)
# 1,000 cells
tar_load(panc_sim_DEG_01_CELLS_1000)
tar_load(panc_sim_DEG_05_CELLS_1000)
tar_load(panc_sim_DEG_10_CELLS_1000)
tar_load(panc_sim_DEG_20_CELLS_1000)
# 2,500 cells
tar_load(panc_sim_DEG_01_CELLS_2500)
tar_load(panc_sim_DEG_05_CELLS_2500)
tar_load(panc_sim_DEG_10_CELLS_2500)
tar_load(panc_sim_DEG_20_CELLS_2500)
# 5,000 cells
tar_load(panc_sim_DEG_01_CELLS_5000)
tar_load(panc_sim_DEG_05_CELLS_5000)
tar_load(panc_sim_DEG_10_CELLS_5000)
tar_load(panc_sim_DEG_20_CELLS_5000)
# coerce to list & process
obj_list <- purrr::map(ls(pattern = "panc_sim")[!grepl("balanced", ls(pattern = "panc_sim"))], function(sim) {
obj <- eval(as.symbol(sim))
obj <- as.Seurat(obj, counts = "counts", data = "logcounts")
obj@meta.data <- mutate(obj@meta.data,
perc_deg = paste0(as.numeric(stringr::str_remove(stringr::str_extract(sim, "panc_sim_DEG_.."), "panc_sim_DEG_")), "%"),
n_cells = as.character(ncol(obj)),
n_genes = as.character(nrow(obj)),
sce_name = sim)
return(obj)
})
rm(list = ls(pattern = "panc_sim")); gc(full = TRUE)Let’s generate the plots for each dataset.
purrr::walk(obj_list, function(z) {
# gather metadata
obj_name <- z@meta.data$sce_name[1]
n_cells <- z@meta.data$n_cells[1]
n_genes <- z@meta.data$n_genes[1]
perc_deg <- z@meta.data$perc_deg[1]
# summary stat table
sparsity_count <- mean(z@assays$originalexp@counts == 0)
mean_count <- mean(z@assays$originalexp@counts)
med_count <- ifelse(sparsity_count > 0.5, 0, median(z@assays$originalexp@counts))
sd_count <- sd(z@assays$originalexp@counts)
var_count <- sd_count^2
range_count <- range(z@assays$originalexp@counts)
summary_df <- data.frame(metric = c("Mean", "Median", "S.D.", "Variance", "Range", "Sparsity"),
value = c(round(mean_count, 2),
round(med_count, 2),
round(sd_count, 2),
round(var_count, 2),
paste0("(", range_count[1], ", ", range_count[2], ")"),
paste0(round(sparsity_count, 4) * 100, "%")))
# create counts histogram
p0 <- data.frame(x = as.numeric(z@assays$originalexp@counts)) %>%
ggplot(aes(x = x)) +
geom_histogram(fill = "dodgerblue") +
scale_y_continuous(labels = scales::label_scientific()) +
scale_x_continuous(labels = scales::label_comma()) +
labs(x = "Raw Expression", y = "Frequency") +
theme_classic(base_size = 14)
# create log counts histogram
p1 <- data.frame(x = as.numeric(z@assays$originalexp@data)) %>%
ggplot(aes(x = x)) +
geom_histogram(fill = "forestgreen") +
scale_y_continuous(labels = scales::label_scientific()) +
labs(x = "Normalized Expression", y = "Frequency") +
theme_classic(base_size = 14)
# create UMAP by cluster
p2 <- data.frame(UMAP1 = z@reductions$UMAP@cell.embeddings[, 1],
UMAP2 = z@reductions$UMAP@cell.embeddings[, 2],
cluster = z$label) %>%
ggplot(aes(x = UMAP1, y = UMAP2, color = cluster)) +
geom_point() +
scale_color_manual(values = paletteer_d("ggsci::category20_d3")) +
labs(x = "UMAP 1", y = "UMAP 2", color = "Louvain Cluster") +
theme_classic(base_size = 14) +
theme(axis.text = element_blank(),
axis.ticks = element_blank()) +
guides(color = guide_legend(override.aes = list(size = 2)))
# create PCA of cell ordering
p3 <- data.frame(PC1 = z@reductions$PCA@cell.embeddings[, 1],
PC2 = z@reductions$PCA@cell.embeddings[, 2],
cell_time = z$cell_time_normed) %>%
ggplot(aes(x = PC1, y = PC2, color = cell_time)) +
geom_point() +
scale_color_gradientn(colors = paletteer_d("wesanderson::Zissou1")) +
labs(x = "PC 1", y = "PC 2", color = "True Ordering") +
theme_classic(base_size = 14) +
theme(axis.text = element_blank(),
axis.ticks = element_blank())
# table of simulation parameters
param_df <- data.frame(metric = c("Number of Cells", "Number of Genes", "% Dynamic Genes"),
value = c(as.character(n_cells), as.character(n_genes), perc_deg))
plot_table <- rbind(summary_df, param_df) %>%
gridExtra::tableGrob(rows = NULL,
cols = c("Metric", "Value"),
theme = gridExtra::ttheme_minimal(core = list(fg_params = list(hjust = 0, x = 0.05)),
colhead = list(fg_params = list(hjust = 0, x = 0.05)))) %>%
gtable::gtable_add_grob(grobs = grid::rectGrob(gp = grid::gpar(fill = NA, lwd = 2)),
t = 2,
b = nrow(.),
l = 1,
r = ncol(.)) %>%
gtable::gtable_add_grob(grobs = grid::rectGrob(gp = grid::gpar(fill = NA, lwd = 2)),
t = 1,
l = 1,
r = ncol(.))
# align everything
p4a <- (p0 | p1) / (p2 | p3) +
plot_layout(guides = "collect")
p4b <- (p4a | plot_table) +
plot_layout(ncol = 2, widths = c(3, 1)) +
plot_annotation(title = paste0("Metrics for dataset: ", obj_name))
# save & print plot
ggsave(filename = paste0("QC_", obj_name, ".pdf"),
plot = p4b,
device = "pdf",
path = "/blue/rbacher/j.leary/repos/scLANE_Analysis/Figures/QC_Plots/",
width = 13,
height = 8,
units = "in",
dpi = "retina")
print(p4b)
# cleanup
sink(tempfile())
rm(p0, p1, p2, p3, p4a, p4b, plot_table); gc(full = TRUE)
sink()
})rm(obj_list)We load in the multi-subject simulated datasets from the pancreas reference.
# 100 cells
tar_load(panc_sim_DEG_10_CELLS_100_balanced)
tar_load(panc_sim_DEG_20_CELLS_100_balanced)
tar_load(panc_sim_DEG_10_CELLS_100_unbalanced)
tar_load(panc_sim_DEG_20_CELLS_100_unbalanced)
# 500 cells
tar_load(panc_sim_DEG_10_CELLS_500_balanced)
tar_load(panc_sim_DEG_20_CELLS_500_balanced)
tar_load(panc_sim_DEG_10_CELLS_500_unbalanced)
tar_load(panc_sim_DEG_20_CELLS_500_unbalanced)
# 1,000 cells
tar_load(panc_sim_DEG_10_CELLS_1000_balanced)
tar_load(panc_sim_DEG_20_CELLS_1000_balanced)
tar_load(panc_sim_DEG_10_CELLS_1000_unbalanced)
tar_load(panc_sim_DEG_20_CELLS_1000_unbalanced)
# 2,500 cells
tar_load(panc_sim_DEG_10_CELLS_2500_balanced)
tar_load(panc_sim_DEG_20_CELLS_2500_balanced)
tar_load(panc_sim_DEG_10_CELLS_2500_unbalanced)
tar_load(panc_sim_DEG_20_CELLS_2500_unbalanced)
# 5,000 cells
tar_load(panc_sim_DEG_10_CELLS_5000_balanced)
tar_load(panc_sim_DEG_20_CELLS_5000_balanced)
tar_load(panc_sim_DEG_10_CELLS_5000_unbalanced)
tar_load(panc_sim_DEG_20_CELLS_5000_unbalanced)
# coerce to list & process
obj_list <- purrr::map(ls(pattern = "panc_sim_*balanced"), function(sim) {
obj <- eval(as.symbol(sim))
obj <- as.Seurat(obj, counts = "counts", data = "logcounts")
obj@meta.data <- mutate(obj@meta.data,
perc_deg = paste0(as.numeric(stringr::str_remove(stringr::str_extract(sim, "panc_sim_DEG_.."), "panc_sim_DEG_")), "%"),
n_cells = as.character(ncol(obj)),
n_genes = as.character(nrow(obj)),
allocation = ifelse(grepl("_balanced", sim), "Balanced", "Unbalanced"),
sce_name = sim)
return(obj)
})
rm(list = ls(pattern = "panc_sim")); gc(full = TRUE)We regenerate the slightly more complex multi-subject figures as we did for the lung dataset.
purrr::walk(obj_list, function(z) {
# gather metadata
obj_name <- z@meta.data$sce_name[1]
n_cells <- z@meta.data$n_cells[1]
n_genes <- z@meta.data$n_genes[1]
perc_deg <- z@meta.data$perc_deg[1]
allocation <- z@meta.data$allocation[1]
n_subjects <- length(unique(z@meta.data$subject))
# summary stat table
sparsity_count <- mean(z@assays$originalexp@counts == 0)
mean_count <- mean(z@assays$originalexp@counts)
med_count <- ifelse(sparsity_count > 0.5, 0, median(z@assays$originalexp@counts))
sd_count <- sd(z@assays$originalexp@counts)
var_count <- sd_count^2
range_count <- range(z@assays$originalexp@counts)
summary_df <- data.frame(metric = c("Mean", "Median", "S.D.", "Variance", "Range", "Sparsity"),
value = c(round(mean_count, 2),
round(med_count, 2),
round(sd_count, 2),
round(var_count, 2),
paste0("(", range_count[1], ", ", range_count[2], ")"),
paste0(round(sparsity_count, 4) * 100, "%")))
# create counts histogram
p0 <- data.frame(x = as.numeric(z@assays$originalexp@counts)) %>%
ggplot(aes(x = x)) +
geom_histogram(fill = "dodgerblue") +
scale_y_continuous(labels = scales::label_scientific()) +
scale_x_continuous(labels = scales::label_comma()) +
labs(x = "Raw Expression", y = "Frequency") +
theme_classic(base_size = 14)
# create log counts histogram
p1 <- data.frame(x = as.numeric(z@assays$originalexp@data)) %>%
ggplot(aes(x = x)) +
geom_histogram(fill = "forestgreen") +
scale_y_continuous(labels = scales::label_scientific()) +
labs(x = "Normalized Expression", y = "Frequency") +
theme_classic(base_size = 14)
# create UMAP by cluster & subject
legend_clust <- ggpubr::get_legend(p = (data.frame(UMAP1 = z@reductions$UMAP@cell.embeddings[, 1],
UMAP2 = z@reductions$UMAP@cell.embeddings[, 2],
cluster = z$label) %>%
ggplot(aes(x = UMAP1, y = UMAP2, color = cluster)) +
geom_point() +
scale_color_manual(values = paletteer_d("ggsci::category20_d3")[1:length(unique(z$label))]) +
labs(color = "Louvain\nCluster") +
theme_classic(base_size = 14) + theme(legend.text.align = 0.5)))
legend_subj <- ggpubr::get_legend(p = (data.frame(UMAP1 = z@reductions$UMAP@cell.embeddings[, 1],
UMAP2 = z@reductions$UMAP@cell.embeddings[, 2],
subject = z$subject) %>%
ggplot(aes(x = UMAP1, y = UMAP2, color = subject)) +
geom_point() +
scale_color_manual(values = paletteer_d("ggsci::category20_d3")[(length(unique(z$label)) + 1):20]) +
labs(color = "Subject") +
theme_classic(base_size = 14)))
p2a <- data.frame(UMAP1 = z@reductions$UMAP@cell.embeddings[, 1],
UMAP2 = z@reductions$UMAP@cell.embeddings[, 2],
cluster = z$label,
subject = z$subject) %>%
tidyr::pivot_longer(cols = c(cluster, subject), names_to = "ident", values_to = "ident_value") %>%
mutate(ident = case_when(ident == "cluster" ~ "Louvain Cluster",
TRUE ~ "Subject")) %>%
ggplot(aes(x = UMAP1, y = UMAP2, color = ident_value, group = ident)) +
facet_wrap(~ident) +
geom_point() +
scale_color_manual(values = paletteer_d("ggsci::category20_d3")) +
labs(x = "UMAP 1", y = "UMAP 2") +
theme_classic(base_size = 14) +
theme(legend.position = "none",
axis.text = element_blank(),
axis.ticks = element_blank())
p2b <- (p2a + (wrap_elements(legend_clust) / wrap_elements(legend_subj)) +
plot_layout(ncol = 2, widths = c(4, 1)))
# create PCA of cell ordering
p3a <- data.frame(PC1 = z@reductions$PCA@cell.embeddings[, 1],
PC2 = z@reductions$PCA@cell.embeddings[, 2],
cell_time = z$cell_time_normed,
subject = z$subject) %>%
ggplot(aes(x = PC1, y = PC2, color = cell_time)) +
facet_wrap(~subject) +
geom_point() +
scale_color_gradientn(colors = paletteer_d("wesanderson::Zissou1")) +
labs(x = "PC 1", y = "PC 2", color = "True Ordering") +
theme_classic(base_size = 14) +
theme(axis.ticks = element_blank(),
axis.text = element_blank())
p3b <- data.frame(PC1 = z@reductions$PCA@cell.embeddings[, 1],
PC2 = z@reductions$PCA@cell.embeddings[, 2],
cell_time = z$cell_time_normed) %>%
ggplot(aes(x = PC1, y = PC2, color = cell_time)) +
geom_point() +
scale_color_gradientn(colors = paletteer_d("wesanderson::Zissou1")) +
labs(x = "PC 1", y = "PC 2", color = "True Ordering") +
theme_classic(base_size = 14) +
theme(axis.ticks = element_blank(),
axis.text = element_blank())
p3c <- (p3a | p3b) +
plot_layout(guides = "collect", widths = c(3, 2), ncol = 2)
# table of simulation parameters
param_df <- data.frame(metric = c("N Cells", "N Genes", "% Dynamic", "Allocation", "N Subjects"),
value = c(as.character(n_cells),
as.character(n_genes),
perc_deg,
allocation,
n_subjects))
plot_table <- rbind(summary_df, param_df) %>%
gridExtra::tableGrob(rows = NULL,
cols = c("Metric", "Value"),
theme = gridExtra::ttheme_minimal(core = list(fg_params = list(hjust = 0, x = 0.05)),
colhead = list(fg_params = list(hjust = 0, x = 0.05)))) %>%
gtable::gtable_add_grob(grobs = grid::rectGrob(gp = grid::gpar(fill = NA, lwd = 2)),
t = 2,
b = nrow(.),
l = 1,
r = ncol(.)) %>%
gtable::gtable_add_grob(grobs = grid::rectGrob(gp = grid::gpar(fill = NA, lwd = 2)),
t = 1,
l = 1,
r = ncol(.))
# align everything
p4a <- ((p0 | p1 | plot_table) + plot_layout(widths = c(1, 1, 0.5))) / (p2b | p3c) +
plot_layout(nrow = 2) +
plot_annotation(title = paste0("Metrics for dataset: ", obj_name))
# save & print plot
ggsave(filename = paste0("QC_", obj_name, ".pdf"),
plot = p4b,
device = "pdf",
path = "/blue/rbacher/j.leary/repos/scLANE_Analysis/Figures/QC_Plots/",
width = 13,
height = 9,
units = "in",
dpi = "retina")
print(p4a)
# cleanup
sink(tempfile())
rm(p0, p1, p2a, p2b, p3a, p3b, p3c, p4a, plot_table); gc(full = TRUE)
sink()
})
rm(obj_list)First let’s take a look at the reference dataset itself. Loading this one is a bit more complicated - it comes as part of the scVelo Python library, so we use the {reticulate} R package to access & load it before converting it to a SingleCellExperiment object using the {zellkonverter} package. Though the data comes pre-processed, we reprocess it using the same steps as we did on the other two datasets to ensure a good comparison. A necessary wrinkle here is the activation of a conda environment that I created previously & installed scVelo and its dependencies into.
reticulate::use_condaenv(condaenv = "HPG_venv",
conda = "/apps/conda/22.11.1/condabin/conda",
required = TRUE)
scvelo <- reticulate::import("scvelo")
adata <- scvelo$datasets$pancreas()
endo <- zellkonverter::AnnData2SCE(adata = adata)
endo@assays@data$X <- NULL
endo@assays@data$counts <- endo@assays@data$spliced
endo_data_clean <- endo[rowSums(SingleCellExperiment::counts(endo) > 0) >= 3, ]
endo_data_clean <- logNormCounts(endo_data_clean)
var_decomp <- modelGeneVar(endo_data_clean)
top2k_hvgs <- getTopHVGs(var_decomp, n = 2000)
endo_data_clean <- runPCA(endo_data_clean, subset_row = top2k_hvgs)
reducedDim(endo_data_clean, "PCAsub") <- reducedDim(endo_data_clean, "PCA")[, 1:30, drop = FALSE]
endo_data_clean <- runUMAP(endo_data_clean,
dimred = "PCAsub",
n_dimred = 1:30)
g <- buildSNNGraph(endo_data_clean,
use.dimred = "PCAsub",
k = 30)
clusters <- igraph::cluster_louvain(graph = g)$membership
colLabels(endo_data_clean) <- factor(clusters)
endo_data_clean <- as.Seurat(endo_data_clean,
counts = "counts",
data = "logcounts")We re-create the summary statistics table for the pancreas reference.
n_cells <- ncol(endo_data_clean)
n_genes <- nrow(endo_data_clean)
mean_count <- mean(endo_data_clean@assays$originalexp@counts)
med_count <- 0
sd_count <- sd(endo_data_clean@assays$originalexp@counts)
var_count <- sd_count^2 # faster
range_count <- range(endo_data_clean@assays$originalexp@counts)
sparsity_count <- mean(endo_data_clean@assays$originalexp@counts == 0)
summary_df <- data.frame(metric = c("Mean", "Median", "S.D.", "Variance", "Range", "Sparsity"),
value = c(round(mean_count, 2),
med_count,
round(sd_count, 2),
round(var_count, 2),
paste0("(", range_count[1], ", ", range_count[2], ")"),
paste0(round(sparsity_count, 4) * 100, "%")))
plot_table <- gridExtra::tableGrob(summary_df, rows = NULL,
cols = c("Metric", "Value"),
theme = gridExtra::ttheme_minimal(core = list(fg_params = list(hjust = 0, x = 0.05)),
colhead = list(fg_params = list(hjust = 0, x = 0.05)))) %>%
gtable::gtable_add_grob(grobs = grid::rectGrob(gp = grid::gpar(fill = NA, lwd = 2)),
t = 2,
b = nrow(.),
l = 1,
r = ncol(.)) %>%
gtable::gtable_add_grob(grobs = grid::rectGrob(gp = grid::gpar(fill = NA, lwd = 2)),
t = 1,
l = 1,
r = ncol(.))
gc(full = TRUE)Like the pancreas reference, this dataset is small enough that we don’t need to do any trickery to produce histograms.
p0 <- data.frame(x = as.numeric(endo_data_clean@assays$originalexp@counts)) %>%
ggplot(aes(x = x)) +
geom_histogram(fill = "dodgerblue") +
scale_y_continuous(labels = scales::label_scientific()) +
scale_x_continuous(labels = scales::label_comma()) +
labs(x = "Raw Expression", y = "Frequency") +
theme_classic(base_size = 14)
p1 <- data.frame(x = as.numeric(endo_data_clean@assays$originalexp@data)) %>%
ggplot(aes(x = x)) +
geom_histogram(fill = "forestgreen") +
scale_y_continuous(labels = scales::label_scientific()) +
labs(x = "Normalized Expression", y = "Frequency") +
theme_classic(base_size = 14)The UMAP & PCA plots are generated as before.
p2 <- data.frame(UMAP1 = endo_data_clean@reductions$UMAP@cell.embeddings[, 1],
UMAP2 = endo_data_clean@reductions$UMAP@cell.embeddings[, 2],
cluster = endo_data_clean$label) %>%
ggplot(aes(x = UMAP1, y = UMAP2, color = cluster)) +
geom_point() +
scale_color_manual(values = paletteer_d("ggsci::category20_d3")) +
labs(x = "UMAP 1", y = "UMAP 2", color = "Louvain Cluster") +
theme_classic(base_size = 14) +
theme(axis.text = element_blank(),
axis.ticks = element_blank()) +
guides(color = guide_legend(override.aes = list(size = 2)))
p3 <- data.frame(PC1 = endo_data_clean@reductions$PCA@cell.embeddings[, 1],
PC2 = endo_data_clean@reductions$PCA@cell.embeddings[, 2],
cluster = endo_data_clean$label) %>%
ggplot(aes(x = PC1, y = PC2, color = cluster)) +
geom_point() +
scale_color_manual(values = paletteer_d("ggsci::category20_d3")) +
labs(x = "PC 1", y = "PC 2", color = "Louvain Cluster") +
theme_classic(base_size = 14) +
theme(legend.position = "none",
axis.text = element_blank(),
axis.ticks = element_blank())
p3We save the final plot as a PDF & display it.
p4a <- (p0 | p1) / (p2 | p3) +
plot_layout(guides = "collect")
p4b <- (p4a | plot_table) +
plot_layout(ncol = 2, widths = c(3, 1)) +
plot_annotation(title = paste0("Metrics for Endocrinogenesis Reference Dataset"))
ggsave(filename = "QC_endo_reference.pdf",
plot = p4b,
device = "pdf",
path = "/blue/rbacher/j.leary/repos/scLANE_Analysis/Figures/QC_Plots/",
width = 13,
height = 8,
units = "in",
dpi = "retina")
p4bWe load the single-subject endocrinogenesis datasets.
# 100 cells
tar_load(endo_sim_DEG_01_CELLS_100)
tar_load(endo_sim_DEG_05_CELLS_100)
tar_load(endo_sim_DEG_10_CELLS_100)
tar_load(endo_sim_DEG_20_CELLS_100)
# 500 cells
tar_load(endo_sim_DEG_01_CELLS_500)
tar_load(endo_sim_DEG_05_CELLS_500)
tar_load(endo_sim_DEG_10_CELLS_500)
tar_load(endo_sim_DEG_20_CELLS_500)
# 1,000 cells
tar_load(endo_sim_DEG_01_CELLS_1000)
tar_load(endo_sim_DEG_05_CELLS_1000)
tar_load(endo_sim_DEG_10_CELLS_1000)
tar_load(endo_sim_DEG_20_CELLS_1000)
# 2,500 cells
tar_load(endo_sim_DEG_01_CELLS_2500)
tar_load(endo_sim_DEG_05_CELLS_2500)
tar_load(endo_sim_DEG_10_CELLS_2500)
tar_load(endo_sim_DEG_20_CELLS_2500)
# 5,000 cells
tar_load(endo_sim_DEG_01_CELLS_5000)
tar_load(endo_sim_DEG_05_CELLS_5000)
tar_load(endo_sim_DEG_10_CELLS_5000)
tar_load(endo_sim_DEG_20_CELLS_5000)
# coerce to list & process
obj_list <- purrr::map(ls(pattern = "endo_sim")[!grepl("balanced", ls(pattern = "endo_sim"))], function(sim) {
obj <- eval(as.symbol(sim))
obj <- as.Seurat(obj, counts = "counts", data = "logcounts")
obj@meta.data <- mutate(obj@meta.data,
perc_deg = paste0(as.numeric(stringr::str_remove(stringr::str_extract(sim, "endo_sim_DEG_.."), "endo_sim_DEG_")), "%"),
n_cells = as.character(ncol(obj)),
n_genes = as.character(nrow(obj)),
sce_name = sim)
return(obj)
})
rm(list = ls(pattern = "endo_sim")); gc(full = TRUE)We generate the plots for each dataset.
purrr::walk(obj_list, function(z) {
# gather metadata
obj_name <- z@meta.data$sce_name[1]
n_cells <- z@meta.data$n_cells[1]
n_genes <- z@meta.data$n_genes[1]
perc_deg <- z@meta.data$perc_deg[1]
# summary stat table
sparsity_count <- mean(z@assays$originalexp@counts == 0)
mean_count <- mean(z@assays$originalexp@counts)
med_count <- ifelse(sparsity_count > 0.5, 0, median(z@assays$originalexp@counts))
sd_count <- sd(z@assays$originalexp@counts)
var_count <- sd_count^2
range_count <- range(z@assays$originalexp@counts)
summary_df <- data.frame(metric = c("Mean", "Median", "S.D.", "Variance", "Range", "Sparsity"),
value = c(round(mean_count, 2),
round(med_count, 2),
round(sd_count, 2),
round(var_count, 2),
paste0("(", range_count[1], ", ", range_count[2], ")"),
paste0(round(sparsity_count, 4) * 100, "%")))
# create counts histogram
p0 <- data.frame(x = as.numeric(z@assays$originalexp@counts)) %>%
ggplot(aes(x = x)) +
geom_histogram(fill = "dodgerblue") +
scale_y_continuous(labels = scales::label_scientific()) +
scale_x_continuous(labels = scales::label_comma()) +
labs(x = "Raw Expression", y = "Frequency") +
theme_classic(base_size = 14)
# create log counts histogram
p1 <- data.frame(x = as.numeric(z@assays$originalexp@data)) %>%
ggplot(aes(x = x)) +
geom_histogram(fill = "forestgreen") +
scale_y_continuous(labels = scales::label_scientific()) +
labs(x = "Normalized Expression", y = "Frequency") +
theme_classic(base_size = 14)
# create UMAP by cluster
p2 <- data.frame(UMAP1 = z@reductions$UMAP@cell.embeddings[, 1],
UMAP2 = z@reductions$UMAP@cell.embeddings[, 2],
cluster = z$label) %>%
ggplot(aes(x = UMAP1, y = UMAP2, color = cluster)) +
geom_point() +
scale_color_manual(values = paletteer_d("ggsci::category20_d3")) +
labs(x = "UMAP 1", y = "UMAP 2", color = "Louvain Cluster") +
theme_classic(base_size = 14) +
theme(axis.text = element_blank(),
axis.ticks = element_blank()) +
guides(color = guide_legend(override.aes = list(size = 2)))
# create PCA of cell ordering
p3 <- data.frame(PC1 = z@reductions$PCA@cell.embeddings[, 1],
PC2 = z@reductions$PCA@cell.embeddings[, 2],
cell_time = z$cell_time_normed) %>%
ggplot(aes(x = PC1, y = PC2, color = cell_time)) +
geom_point() +
scale_color_gradientn(colors = paletteer_d("wesanderson::Zissou1")) +
labs(x = "PC 1", y = "PC 2", color = "True Ordering") +
theme_classic(base_size = 14) +
theme(axis.text = element_blank(),
axis.ticks = element_blank())
# table of simulation parameters
param_df <- data.frame(metric = c("Number of Cells", "Number of Genes", "% Dynamic Genes"),
value = c(as.character(n_cells), as.character(n_genes), perc_deg))
plot_table <- rbind(summary_df, param_df) %>%
gridExtra::tableGrob(rows = NULL,
cols = c("Metric", "Value"),
theme = gridExtra::ttheme_minimal(core = list(fg_params = list(hjust = 0, x = 0.05)),
colhead = list(fg_params = list(hjust = 0, x = 0.05)))) %>%
gtable::gtable_add_grob(grobs = grid::rectGrob(gp = grid::gpar(fill = NA, lwd = 2)),
t = 2,
b = nrow(.),
l = 1,
r = ncol(.)) %>%
gtable::gtable_add_grob(grobs = grid::rectGrob(gp = grid::gpar(fill = NA, lwd = 2)),
t = 1,
l = 1,
r = ncol(.))
# align everything
p4a <- (p0 | p1) / (p2 | p3) +
plot_layout(guides = "collect")
p4b <- (p4a | plot_table) +
plot_layout(ncol = 2, widths = c(3, 1)) +
plot_annotation(title = paste0("Metrics for dataset: ", obj_name))
# save & print plot
ggsave(filename = paste0("QC_", obj_name, ".pdf"),
plot = p4b,
device = "pdf",
path = "/blue/rbacher/j.leary/repos/scLANE_Analysis/Figures/QC_Plots/",
width = 13,
height = 8,
units = "in",
dpi = "retina")
print(p4b)
# cleanup
sink(tempfile())
rm(p0, p1, p2, p3, p4a, p4b, plot_table); gc(full = TRUE)
sink()
})rm(obj_list)Lastly, we bring the multi-subject simulated datasets into memory.
# 100 cells
tar_load(endo_sim_DEG_10_CELLS_100_balanced)
tar_load(endo_sim_DEG_20_CELLS_100_balanced)
tar_load(endo_sim_DEG_10_CELLS_100_unbalanced)
tar_load(endo_sim_DEG_20_CELLS_100_unbalanced)
# 500 cells
tar_load(endo_sim_DEG_10_CELLS_500_balanced)
tar_load(endo_sim_DEG_20_CELLS_500_balanced)
tar_load(endo_sim_DEG_10_CELLS_500_unbalanced)
tar_load(endo_sim_DEG_20_CELLS_500_unbalanced)
# 1,000 cells
tar_load(endo_sim_DEG_10_CELLS_1000_balanced)
tar_load(endo_sim_DEG_20_CELLS_1000_balanced)
tar_load(endo_sim_DEG_10_CELLS_1000_unbalanced)
tar_load(endo_sim_DEG_20_CELLS_1000_unbalanced)
# 2,500 cells
tar_load(endo_sim_DEG_10_CELLS_2500_balanced)
tar_load(endo_sim_DEG_20_CELLS_2500_balanced)
tar_load(endo_sim_DEG_10_CELLS_2500_unbalanced)
tar_load(endo_sim_DEG_20_CELLS_2500_unbalanced)
# 5,000 cells
tar_load(endo_sim_DEG_10_CELLS_5000_balanced)
tar_load(endo_sim_DEG_20_CELLS_5000_balanced)
tar_load(endo_sim_DEG_10_CELLS_5000_unbalanced)
tar_load(endo_sim_DEG_20_CELLS_5000_unbalanced)
# coerce to list & process
obj_list <- purrr::map(ls(pattern = "endo_sim*balanced"), function(sim) {
obj <- eval(as.symbol(sim))
obj <- as.Seurat(obj, counts = "counts", data = "logcounts")
obj@meta.data <- mutate(obj@meta.data,
perc_deg = paste0(as.numeric(stringr::str_remove(stringr::str_extract(sim, "endo_sim_DEG_.."), "endo_sim_DEG_")), "%"),
n_cells = as.character(ncol(obj)),
n_genes = as.character(nrow(obj)),
allocation = ifelse(grepl("_balanced", sim), "Balanced", "Unbalanced"),
sce_name = sim)
return(obj)
})
rm(list = ls(pattern = "endo_sim")); gc(full = TRUE)We generate the final set of plots.
purrr::walk(obj_list, function(z) {
# gather metadata
obj_name <- z@meta.data$sce_name[1]
n_cells <- z@meta.data$n_cells[1]
n_genes <- z@meta.data$n_genes[1]
perc_deg <- z@meta.data$perc_deg[1]
allocation <- z@meta.data$allocation[1]
n_subjects <- length(unique(z@meta.data$subject))
# summary stat table
sparsity_count <- mean(z@assays$originalexp@counts == 0)
mean_count <- mean(z@assays$originalexp@counts)
med_count <- ifelse(sparsity_count > 0.5, 0, median(z@assays$originalexp@counts))
sd_count <- sd(z@assays$originalexp@counts)
var_count <- sd_count^2
range_count <- range(z@assays$originalexp@counts)
summary_df <- data.frame(metric = c("Mean", "Median", "S.D.", "Variance", "Range", "Sparsity"),
value = c(round(mean_count, 2),
round(med_count, 2),
round(sd_count, 2),
round(var_count, 2),
paste0("(", range_count[1], ", ", range_count[2], ")"),
paste0(round(sparsity_count, 4) * 100, "%")))
# create counts histogram
p0 <- data.frame(x = as.numeric(z@assays$originalexp@counts)) %>%
ggplot(aes(x = x)) +
geom_histogram(fill = "dodgerblue") +
scale_y_continuous(labels = scales::label_scientific()) +
scale_x_continuous(labels = scales::label_comma()) +
labs(x = "Raw Expression", y = "Frequency") +
theme_classic(base_size = 14)
# create log counts histogram
p1 <- data.frame(x = as.numeric(z@assays$originalexp@data)) %>%
ggplot(aes(x = x)) +
geom_histogram(fill = "forestgreen") +
scale_y_continuous(labels = scales::label_scientific()) +
labs(x = "Normalized Expression", y = "Frequency") +
theme_classic(base_size = 14)
# create UMAP by cluster & subject
legend_clust <- ggpubr::get_legend(p = (data.frame(UMAP1 = z@reductions$UMAP@cell.embeddings[, 1],
UMAP2 = z@reductions$UMAP@cell.embeddings[, 2],
cluster = z$label) %>%
ggplot(aes(x = UMAP1, y = UMAP2, color = cluster)) +
geom_point() +
scale_color_manual(values = paletteer_d("ggsci::category20_d3")[1:length(unique(z$label))]) +
labs(color = "Louvain\nCluster") +
theme_classic(base_size = 14) + theme(legend.text.align = 0.5)))
legend_subj <- ggpubr::get_legend(p = (data.frame(UMAP1 = z@reductions$UMAP@cell.embeddings[, 1],
UMAP2 = z@reductions$UMAP@cell.embeddings[, 2],
subject = z$subject) %>%
ggplot(aes(x = UMAP1, y = UMAP2, color = subject)) +
geom_point() +
scale_color_manual(values = paletteer_d("ggsci::category20_d3")[(length(unique(z$label)) + 1):20]) +
labs(color = "Subject") +
theme_classic(base_size = 14)))
p2a <- data.frame(UMAP1 = z@reductions$UMAP@cell.embeddings[, 1],
UMAP2 = z@reductions$UMAP@cell.embeddings[, 2],
cluster = z$label,
subject = z$subject) %>%
tidyr::pivot_longer(cols = c(cluster, subject), names_to = "ident", values_to = "ident_value") %>%
mutate(ident = case_when(ident == "cluster" ~ "Louvain Cluster",
TRUE ~ "Subject")) %>%
ggplot(aes(x = UMAP1, y = UMAP2, color = ident_value, group = ident)) +
facet_wrap(~ident) +
geom_point() +
scale_color_manual(values = paletteer_d("ggsci::category20_d3")) +
labs(x = "UMAP 1", y = "UMAP 2") +
theme_classic(base_size = 14) +
theme(legend.position = "none",
axis.text = element_blank(),
axis.ticks = element_blank())
p2b <- (p2a + (wrap_elements(legend_clust) / wrap_elements(legend_subj)) +
plot_layout(ncol = 2, widths = c(4, 1)))
# create PCA of cell ordering
p3a <- data.frame(PC1 = z@reductions$PCA@cell.embeddings[, 1],
PC2 = z@reductions$PCA@cell.embeddings[, 2],
cell_time = z$cell_time_normed,
subject = z$subject) %>%
ggplot(aes(x = PC1, y = PC2, color = cell_time)) +
facet_wrap(~subject) +
geom_point() +
scale_color_gradientn(colors = paletteer_d("wesanderson::Zissou1")) +
labs(x = "PC 1", y = "PC 2", color = "True Ordering") +
theme_classic(base_size = 14) +
theme(axis.ticks = element_blank(),
axis.text = element_blank())
p3b <- data.frame(PC1 = z@reductions$PCA@cell.embeddings[, 1],
PC2 = z@reductions$PCA@cell.embeddings[, 2],
cell_time = z$cell_time_normed) %>%
ggplot(aes(x = PC1, y = PC2, color = cell_time)) +
geom_point() +
scale_color_gradientn(colors = paletteer_d("wesanderson::Zissou1")) +
labs(x = "PC 1", y = "PC 2", color = "True Ordering") +
theme_classic(base_size = 14) +
theme(axis.ticks = element_blank(),
axis.text = element_blank())
p3c <- (p3a | p3b) +
plot_layout(guides = "collect", widths = c(3, 2), ncol = 2)
# table of simulation parameters
param_df <- data.frame(metric = c("N Cells", "N Genes", "% Dynamic", "Allocation", "N Subjects"),
value = c(as.character(n_cells),
as.character(n_genes),
perc_deg,
allocation,
n_subjects))
plot_table <- rbind(summary_df, param_df) %>%
gridExtra::tableGrob(rows = NULL,
cols = c("Metric", "Value"),
theme = gridExtra::ttheme_minimal(core = list(fg_params = list(hjust = 0, x = 0.05)),
colhead = list(fg_params = list(hjust = 0, x = 0.05)))) %>%
gtable::gtable_add_grob(grobs = grid::rectGrob(gp = grid::gpar(fill = NA, lwd = 2)),
t = 2,
b = nrow(.),
l = 1,
r = ncol(.)) %>%
gtable::gtable_add_grob(grobs = grid::rectGrob(gp = grid::gpar(fill = NA, lwd = 2)),
t = 1,
l = 1,
r = ncol(.))
# align everything
p4a <- ((p0 | p1 | plot_table) + plot_layout(widths = c(1, 1, 0.5))) / (p2b | p3c) +
plot_layout(nrow = 2) +
plot_annotation(title = paste0("Metrics for dataset: ", obj_name))
# save & print plot
ggsave(filename = paste0("QC_", obj_name, ".pdf"),
plot = p4b,
device = "pdf",
path = "/blue/rbacher/j.leary/repos/scLANE_Analysis/Figures/QC_Plots/",
width = 13,
height = 9,
units = "in",
dpi = "retina")
print(p4a)
# cleanup
sink(tempfile())
rm(p0, p1, p2a, p2b, p3a, p3b, p3c, p4a, plot_table); gc(full = TRUE)
sink()
})
rm(obj_list)sessioninfo::session_info()## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 4.2.2 Patched (2022-11-10 r83330)
## os Ubuntu 22.04.1 LTS
## system x86_64, linux-gnu
## ui X11
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz America/New_York
## date 2023-01-14
## pandoc 2.9.2.1 @ /usr/bin/ (via rmarkdown)
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## package * version date (UTC) lib source
## abind 1.4-5 2016-07-21 [2] CRAN (R 4.2.0)
## AnnotationDbi 1.58.0 2022-04-26 [2] Bioconductor
## AnnotationFilter 1.20.0 2022-04-26 [2] Bioconductor
## AnnotationHub 3.4.0 2022-04-26 [2] Bioconductor
## assertthat 0.2.1 2019-03-21 [2] CRAN (R 4.2.0)
## backports 1.4.1 2021-12-13 [2] CRAN (R 4.2.0)
## base64url 1.4 2018-05-14 [2] CRAN (R 4.2.0)
## basilisk 1.8.1 2022-08-25 [2] Bioconductor
## basilisk.utils 1.8.0 2022-04-26 [2] Bioconductor
## beachmat 2.12.0 2022-04-26 [2] Bioconductor
## beeswarm 0.4.0 2021-06-01 [2] CRAN (R 4.2.0)
## bigassertr 0.1.5 2021-07-08 [2] CRAN (R 4.2.0)
## bigparallelr 0.3.2 2021-10-02 [2] CRAN (R 4.2.0)
## bigstatsr 1.5.6 2022-02-03 [2] CRAN (R 4.2.0)
## Biobase * 2.56.0 2022-04-26 [2] Bioconductor
## BiocFileCache 2.4.0 2022-04-26 [2] Bioconductor
## BiocGenerics * 0.42.0 2022-04-26 [2] Bioconductor
## BiocIO 1.6.0 2022-04-26 [2] Bioconductor
## BiocManager 1.30.18 2022-05-18 [1] CRAN (R 4.2.0)
## BiocNeighbors 1.14.0 2022-04-26 [2] Bioconductor
## BiocParallel 1.30.3 2022-06-05 [2] Bioconductor
## BiocSingular 1.12.0 2022-04-26 [2] Bioconductor
## BiocVersion 3.15.2 2022-03-29 [2] Bioconductor
## biomaRt 2.52.0 2022-04-26 [2] Bioconductor
## Biostrings 2.64.1 2022-08-18 [2] Bioconductor
## bit 4.0.4 2020-08-04 [2] CRAN (R 4.2.0)
## bit64 4.0.5 2020-08-30 [2] CRAN (R 4.2.0)
## bitops 1.0-7 2021-04-24 [2] CRAN (R 4.2.0)
## blob 1.2.3 2022-04-10 [2] CRAN (R 4.2.0)
## bluster 1.6.0 2022-04-26 [2] Bioconductor
## broom 1.0.1 2022-08-29 [2] CRAN (R 4.2.1)
## bslib 0.4.0 2022-07-16 [2] CRAN (R 4.2.0)
## cachem 1.0.6 2021-08-19 [2] CRAN (R 4.2.0)
## callr 3.7.2 2022-08-22 [2] CRAN (R 4.2.1)
## cellranger 1.1.0 2016-07-27 [2] CRAN (R 4.2.0)
## cli 3.4.1 2022-09-23 [2] CRAN (R 4.2.1)
## cluster 2.1.4 2022-08-22 [4] CRAN (R 4.2.1)
## codetools 0.2-18 2020-11-04 [4] CRAN (R 4.2.0)
## colorspace 2.0-3 2022-02-21 [2] CRAN (R 4.2.0)
## cowplot 1.1.1 2020-12-30 [2] CRAN (R 4.2.0)
## crayon 1.5.2 2022-09-29 [2] CRAN (R 4.2.1)
## curl 4.3.2 2021-06-23 [2] CRAN (R 4.2.0)
## data.table 1.14.2 2021-09-27 [2] CRAN (R 4.2.0)
## DBI 1.1.3 2022-06-18 [2] CRAN (R 4.2.0)
## dbplyr 2.2.1 2022-06-27 [2] CRAN (R 4.2.0)
## DelayedArray 0.22.0 2022-04-26 [2] Bioconductor
## DelayedMatrixStats 1.18.1 2022-09-27 [2] Bioconductor
## deldir 1.0-6 2021-10-23 [2] CRAN (R 4.2.0)
## digest 0.6.30 2022-10-18 [2] CRAN (R 4.2.1)
## dir.expiry 1.4.0 2022-04-26 [2] Bioconductor
## doParallel 1.0.17 2022-02-07 [2] CRAN (R 4.2.0)
## dplyr * 1.0.9 2022-04-28 [1] CRAN (R 4.2.0)
## dqrng 0.3.0 2021-05-01 [2] CRAN (R 4.2.0)
## edgeR 3.38.4 2022-08-07 [2] Bioconductor
## ellipsis 0.3.2 2021-04-29 [2] CRAN (R 4.2.0)
## ensembldb 2.20.2 2022-06-16 [2] Bioconductor
## evaluate 0.16 2022-08-09 [2] CRAN (R 4.2.0)
## ExperimentHub 2.4.0 2022-04-26 [2] Bioconductor
## fansi 1.0.3 2022-03-24 [2] CRAN (R 4.2.0)
## farver 2.1.1 2022-07-06 [2] CRAN (R 4.2.0)
## fastmap 1.1.0 2021-01-25 [2] CRAN (R 4.2.0)
## filelock 1.0.2 2018-10-05 [2] CRAN (R 4.2.0)
## fitdistrplus 1.1-8 2022-03-10 [2] CRAN (R 4.2.0)
## flock 0.7 2016-11-12 [2] CRAN (R 4.2.0)
## FNN 1.1.3.1 2022-05-23 [2] CRAN (R 4.2.0)
## forcats * 0.5.2 2022-08-19 [2] CRAN (R 4.2.1)
## foreach 1.5.2 2022-02-02 [1] CRAN (R 4.2.0)
## fs 1.5.2 2021-12-08 [2] CRAN (R 4.2.0)
## future * 1.28.0 2022-09-02 [2] CRAN (R 4.2.1)
## future.apply 1.9.1 2022-09-07 [2] CRAN (R 4.2.1)
## future.callr * 0.8.0 2022-04-01 [1] CRAN (R 4.2.0)
## gargle 1.2.1 2022-09-08 [2] CRAN (R 4.2.1)
## generics 0.1.3 2022-07-05 [1] CRAN (R 4.2.0)
## GenomeInfoDb * 1.32.4 2022-09-06 [2] Bioconductor
## GenomeInfoDbData 1.2.8 2022-06-16 [2] Bioconductor
## GenomicAlignments 1.32.1 2022-07-24 [2] Bioconductor
## GenomicFeatures 1.48.4 2022-09-20 [2] Bioconductor
## GenomicRanges * 1.48.0 2022-04-26 [2] Bioconductor
## ggbeeswarm 0.6.0 2017-08-07 [2] CRAN (R 4.2.0)
## ggplot2 * 3.3.6 2022-05-03 [2] CRAN (R 4.2.0)
## ggrepel 0.9.1 2021-01-15 [1] CRAN (R 4.1.1)
## ggridges 0.5.4 2022-09-26 [2] CRAN (R 4.2.1)
## globals 0.16.1 2022-08-28 [2] CRAN (R 4.2.1)
## glue 1.6.2 2022-02-24 [2] CRAN (R 4.2.0)
## goftest 1.2-3 2021-10-07 [2] CRAN (R 4.2.0)
## googledrive 2.0.0 2021-07-08 [2] CRAN (R 4.2.0)
## googlesheets4 1.0.1 2022-08-13 [2] CRAN (R 4.2.0)
## gridExtra 2.3 2017-09-09 [2] CRAN (R 4.2.0)
## gtable 0.3.1 2022-09-01 [2] CRAN (R 4.2.1)
## haven 2.5.1 2022-08-22 [2] CRAN (R 4.2.1)
## here 1.0.1 2020-12-13 [2] CRAN (R 4.2.0)
## highr 0.9 2021-04-16 [2] CRAN (R 4.2.0)
## hms 1.1.2 2022-08-19 [2] CRAN (R 4.2.1)
## htmltools 0.5.3 2022-07-18 [2] CRAN (R 4.2.0)
## htmlwidgets 1.5.4 2021-09-08 [2] CRAN (R 4.2.0)
## httpuv 1.6.6 2022-09-08 [2] CRAN (R 4.2.1)
## httr 1.4.4 2022-08-17 [2] CRAN (R 4.2.1)
## ica 1.0-3 2022-07-08 [2] CRAN (R 4.2.0)
## igraph * 1.3.5 2022-09-22 [2] CRAN (R 4.2.1)
## interactiveDisplayBase 1.34.0 2022-04-26 [2] Bioconductor
## iotools 0.3-2 2021-07-23 [2] CRAN (R 4.2.0)
## IRanges * 2.30.1 2022-08-18 [2] Bioconductor
## irlba 2.3.5.1 2022-10-03 [2] CRAN (R 4.2.1)
## iterators 1.0.14 2022-02-05 [2] CRAN (R 4.2.0)
## jquerylib 0.1.4 2021-04-26 [2] CRAN (R 4.2.0)
## jsonlite 1.8.2 2022-10-02 [2] CRAN (R 4.2.1)
## KEGGREST 1.36.3 2022-07-12 [2] Bioconductor
## KernSmooth 2.23-20 2021-05-03 [4] CRAN (R 4.2.0)
## knitr 1.41 2022-11-18 [2] CRAN (R 4.2.2)
## labeling 0.4.2 2020-10-20 [2] CRAN (R 4.2.0)
## later 1.3.0 2021-08-18 [2] CRAN (R 4.2.0)
## lattice 0.20-45 2021-09-22 [4] CRAN (R 4.2.0)
## lazyeval 0.2.2 2019-03-15 [2] CRAN (R 4.2.0)
## leiden 0.4.3 2022-09-10 [2] CRAN (R 4.2.1)
## lifecycle 1.0.3 2022-10-07 [2] CRAN (R 4.2.1)
## limma 3.52.4 2022-09-27 [2] Bioconductor
## listenv 0.8.0 2019-12-05 [2] CRAN (R 4.2.0)
## lmtest 0.9-40 2022-03-21 [2] CRAN (R 4.2.0)
## locfit 1.5-9.6 2022-07-11 [2] CRAN (R 4.2.0)
## logging 0.10-108 2019-07-14 [1] CRAN (R 4.1.1)
## lubridate 1.8.0 2021-10-07 [2] CRAN (R 4.2.0)
## magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.2.0)
## MASS 7.3-58.1 2022-08-03 [4] CRAN (R 4.2.1)
## Matrix 1.5-1 2022-09-13 [2] CRAN (R 4.2.1)
## MatrixGenerics * 1.8.1 2022-06-26 [2] Bioconductor
## matrixStats * 0.62.0 2022-04-19 [2] CRAN (R 4.2.0)
## memoise 2.0.1 2021-11-26 [2] CRAN (R 4.2.0)
## metapod 1.4.0 2022-04-26 [2] Bioconductor
## mgcv 1.8-41 2022-10-21 [4] CRAN (R 4.2.1)
## mime 0.12 2021-09-28 [2] CRAN (R 4.2.0)
## miniUI 0.1.1.1 2018-05-18 [2] CRAN (R 4.2.0)
## modelr 0.1.9 2022-08-19 [2] CRAN (R 4.2.1)
## munsell 0.5.0 2018-06-12 [2] CRAN (R 4.2.0)
## nlme 3.1-160 2022-10-10 [4] CRAN (R 4.2.1)
## paletteer * 1.5.0 2022-10-19 [1] CRAN (R 4.2.1)
## parallelly 1.32.1 2022-07-21 [2] CRAN (R 4.2.0)
## patchwork * 1.1.2 2022-08-19 [1] CRAN (R 4.2.0)
## pbapply 1.5-0 2021-09-16 [2] CRAN (R 4.2.0)
## pillar 1.8.1 2022-08-19 [2] CRAN (R 4.2.1)
## pkgconfig 2.0.3 2019-09-22 [2] CRAN (R 4.2.0)
## plotly 4.10.0 2021-10-09 [2] CRAN (R 4.2.0)
## plyr 1.8.7 2022-03-24 [2] CRAN (R 4.2.0)
## png 0.1-7 2013-12-03 [2] CRAN (R 4.2.0)
## polyclip 1.10-0 2019-03-14 [2] CRAN (R 4.2.0)
## prettyunits 1.1.1 2020-01-24 [2] CRAN (R 4.2.0)
## prismatic 1.1.1 2022-08-15 [1] CRAN (R 4.2.0)
## processx 3.7.0 2022-07-07 [2] CRAN (R 4.2.0)
## progress 1.2.2 2019-05-16 [2] CRAN (R 4.2.0)
## progressr 0.11.0 2022-09-02 [2] CRAN (R 4.2.1)
## promises 1.2.0.1 2021-02-11 [2] CRAN (R 4.2.0)
## ProtGenerics 1.28.0 2022-04-26 [2] Bioconductor
## ps 1.7.1 2022-06-18 [2] CRAN (R 4.2.0)
## purrr * 0.3.5 2022-10-06 [2] CRAN (R 4.2.1)
## R6 2.5.1 2021-08-19 [2] CRAN (R 4.2.0)
## ragg 1.2.3 2022-09-29 [2] CRAN (R 4.2.1)
## RANN 2.6.1 2019-01-08 [2] CRAN (R 4.2.0)
## rappdirs 0.3.3 2021-01-31 [2] CRAN (R 4.2.0)
## RColorBrewer 1.1-3 2022-04-03 [2] CRAN (R 4.2.0)
## Rcpp 1.0.9 2022-07-08 [2] CRAN (R 4.2.1)
## RcppAnnoy 0.0.19 2021-07-30 [2] CRAN (R 4.2.0)
## RcppZiggurat 0.1.6 2020-10-20 [2] CRAN (R 4.2.0)
## RCurl 1.98-1.9 2022-10-03 [2] CRAN (R 4.2.1)
## readr * 2.1.3 2022-10-01 [2] CRAN (R 4.2.1)
## readxl 1.4.1 2022-08-17 [2] CRAN (R 4.2.1)
## rematch2 2.1.2 2020-05-01 [2] CRAN (R 4.2.0)
## reprex 2.0.2 2022-08-17 [2] CRAN (R 4.2.1)
## reshape2 1.4.4 2020-04-09 [2] CRAN (R 4.2.0)
## restfulr 0.0.15 2022-06-16 [2] CRAN (R 4.2.0)
## reticulate 1.25 2022-05-11 [1] CRAN (R 4.2.0)
## Rfast 2.0.6 2022-02-16 [2] CRAN (R 4.2.0)
## rgeos 0.5-9 2021-12-15 [2] CRAN (R 4.2.0)
## rjson 0.2.21 2022-01-09 [2] CRAN (R 4.2.0)
## rlang 1.0.6 2022-09-24 [2] CRAN (R 4.2.1)
## rmarkdown 2.16 2022-08-24 [1] CRAN (R 4.2.0)
## ROCR 1.0-11 2020-05-02 [2] CRAN (R 4.2.0)
## rpart 4.1.19 2022-10-21 [4] CRAN (R 4.2.1)
## rprojroot 2.0.3 2022-04-02 [2] CRAN (R 4.2.0)
## Rsamtools 2.12.0 2022-04-26 [2] Bioconductor
## RSQLite 2.2.17 2022-09-10 [2] CRAN (R 4.2.1)
## rsvd 1.0.5 2021-04-16 [2] CRAN (R 4.2.0)
## rtracklayer 1.56.1 2022-06-23 [2] Bioconductor
## Rtsne 0.16 2022-04-17 [2] CRAN (R 4.2.0)
## rvest 1.0.3 2022-08-19 [2] CRAN (R 4.2.1)
## S4Vectors * 0.34.0 2022-04-26 [2] Bioconductor
## sass 0.4.2 2022-07-16 [1] CRAN (R 4.2.0)
## scaffold * 0.2.0 2022-09-04 [1] Github (rhondabacher/scaffold@714c319)
## ScaledMatrix 1.4.1 2022-09-11 [2] Bioconductor
## scales 1.2.1 2022-08-20 [2] CRAN (R 4.2.1)
## scater * 1.24.0 2022-04-26 [2] Bioconductor
## scattermore 0.8 2022-02-14 [2] CRAN (R 4.2.0)
## scran * 1.24.1 2022-09-11 [2] Bioconductor
## scRNAseq * 2.10.0 2022-04-28 [1] Bioconductor
## sctransform 0.3.4 2022-08-20 [1] CRAN (R 4.2.0)
## scuttle * 1.6.3 2022-08-23 [2] Bioconductor
## sessioninfo 1.2.2 2021-12-06 [2] CRAN (R 4.2.0)
## Seurat * 4.1.1 2022-05-02 [1] CRAN (R 4.2.0)
## SeuratObject * 4.1.2 2022-09-20 [2] CRAN (R 4.2.1)
## shiny 1.7.2 2022-07-19 [2] CRAN (R 4.2.0)
## SingleCellExperiment * 1.18.0 2022-04-26 [2] Bioconductor
## sp * 1.5-0 2022-06-05 [2] CRAN (R 4.2.0)
## sparseMatrixStats 1.8.0 2022-04-26 [2] Bioconductor
## spatstat.core 2.4-4 2022-05-18 [2] CRAN (R 4.2.0)
## spatstat.data 2.2-0 2022-04-18 [2] CRAN (R 4.2.0)
## spatstat.geom 2.4-0 2022-03-29 [2] CRAN (R 4.2.0)
## spatstat.random 2.2-0 2022-03-30 [2] CRAN (R 4.2.0)
## spatstat.sparse 2.1-1 2022-04-18 [2] CRAN (R 4.2.0)
## spatstat.utils 2.3-1 2022-05-06 [2] CRAN (R 4.2.0)
## statmod 1.4.37 2022-08-12 [2] CRAN (R 4.2.0)
## stringi 1.7.8 2022-07-11 [2] CRAN (R 4.2.0)
## stringr * 1.4.1 2022-08-20 [2] CRAN (R 4.2.1)
## SummarizedExperiment * 1.26.1 2022-04-29 [2] Bioconductor
## survival 3.4-0 2022-08-09 [4] CRAN (R 4.2.1)
## systemfonts 1.0.4 2022-02-11 [2] CRAN (R 4.2.0)
## tarchetypes * 0.7.0 2022-08-05 [1] CRAN (R 4.2.1)
## targets * 0.13.1 2022-08-05 [1] CRAN (R 4.2.0)
## tensor 1.5 2012-05-05 [2] CRAN (R 4.2.0)
## textshaping 0.3.6 2021-10-13 [2] CRAN (R 4.2.0)
## tibble * 3.1.8 2022-07-22 [2] CRAN (R 4.2.0)
## tidyr * 1.2.1 2022-09-08 [2] CRAN (R 4.2.1)
## tidyselect 1.2.0 2022-10-10 [2] CRAN (R 4.2.1)
## tidyverse * 1.3.2 2022-07-18 [1] CRAN (R 4.2.0)
## tzdb 0.3.0 2022-03-28 [2] CRAN (R 4.2.0)
## utf8 1.2.2 2021-07-24 [2] CRAN (R 4.2.0)
## uwot 0.1.14 2022-08-22 [2] CRAN (R 4.2.1)
## vctrs 0.5.0 2022-10-22 [2] CRAN (R 4.2.1)
## vipor 0.4.5 2017-03-22 [2] CRAN (R 4.2.0)
## viridis 0.6.2 2021-10-13 [2] CRAN (R 4.2.0)
## viridisLite 0.4.1 2022-08-22 [2] CRAN (R 4.2.1)
## withr 2.5.0 2022-03-03 [2] CRAN (R 4.2.0)
## wrswoR 1.1.1 2020-07-26 [1] CRAN (R 4.1.1)
## xfun 0.35 2022-11-16 [2] CRAN (R 4.2.2)
## XML 3.99-0.11 2022-10-03 [2] CRAN (R 4.2.1)
## xml2 1.3.3 2021-11-30 [2] CRAN (R 4.2.0)
## xtable 1.8-4 2019-04-21 [2] CRAN (R 4.2.0)
## XVector 0.36.0 2022-04-26 [2] Bioconductor
## yaml 2.3.5 2022-02-21 [2] CRAN (R 4.2.0)
## zellkonverter 1.6.5 2022-09-13 [1] Bioconductor
## zlibbioc 1.42.0 2022-04-26 [2] Bioconductor
## zoo 1.8-11 2022-09-17 [2] CRAN (R 4.2.1)
##
## [1] /home/j.leary/r_packages_default
## [2] /usr/local/lib/R/site-library
## [3] /usr/lib/R/site-library
## [4] /usr/lib/R/library
##
## ─ Python configuration ───────────────────────────────────────────────────────
## python: /home/j.leary/.conda/envs/HPG_venv/bin/python
## libpython: /home/j.leary/.conda/envs/HPG_venv/lib/libpython3.10.so
## pythonhome: /home/j.leary/.conda/envs/HPG_venv:/home/j.leary/.conda/envs/HPG_venv
## version: 3.10.8 | packaged by conda-forge | (main, Nov 22 2022, 08:26:04) [GCC 10.4.0]
## numpy: /home/j.leary/.conda/envs/HPG_venv/lib/python3.10/site-packages/numpy
## numpy_version: 1.23.5
## scvelo: /home/j.leary/.conda/envs/HPG_venv/lib/python3.10/site-packages/scvelo
##
## NOTE: Python version was forced by use_python function
##
## ──────────────────────────────────────────────────────────────────────────────